1. Data

Load the student scores for the test - here we load the ETH Zurich test data, downloaded from https://pontifex.ethz.ch/s21t5/rate/

head(test_scores_pre)
## # A tibble: 6 x 38
##   year  class     A1    A2    A3    A4    A5    A6    A7    A8    A9   A10   A11
##   <chr> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017  s21t-~     1     0     1     1     1     0     1     0     1     0     1
## 2 2017  s21t-~     1     0     1     1     1     1     0     1     1     2     1
## 3 2017  s21t-~     1     0     0     0     1     1     1     0     1     1     1
## 4 2017  s21t-~     1     0     1     1     1     1     1     1     1     1     1
## 5 2017  s21t-~     1     0     1     0     2     0     1     0     1     0     2
## 6 2017  s21t-~     0     1     0     0     1     2     0     2     2     2     1
## # ... with 25 more variables: A12 <dbl>, A13 <dbl>, A14 <dbl>, A15 <dbl>,
## #   A16 <dbl>, A17 <dbl>, A18 <dbl>, A19 <dbl>, A20 <dbl>, A21 <dbl>,
## #   A22 <dbl>, A23 <dbl>, A24 <dbl>, A25 <dbl>, A26 <dbl>, A27 <dbl>,
## #   A28 <dbl>, A29 <dbl>, A30 <dbl>, A31 <dbl>, A32 <dbl>, A33 <dbl>,
## #   A34 <dbl>, A35 <dbl>, A36 <dbl>
head(test_scores_post)
## # A tibble: 6 x 34
##   year  class     B1    B2    B3    B4    B5    B6    B7    B8    B9   B10   B11
##   <chr> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2019  s21t-~     1     1     1     1     1     1     1     1     1     1     0
## 2 2019  s21t-~     2     1     0     0     0     1     2     1     2     0     1
## 3 2019  s21t-~     1     1     0     1     1     1     1     1     1     1     0
## 4 2019  s21t-~     1     1     1     0     1     0     0     0     1     1     1
## 5 2019  s21t-~     1     0     1     0     0     1     1     0     0     1     0
## 6 2019  s21t-~     1     0     0     0     1     1     0     0     1     1     0
## # ... with 21 more variables: B12 <dbl>, B13 <dbl>, B14 <dbl>, B15 <dbl>,
## #   B16 <dbl>, B17 <dbl>, B18 <dbl>, B19 <dbl>, B20 <dbl>, B21 <dbl>,
## #   B22 <dbl>, B23 <dbl>, B24 <dbl>, B25 <dbl>, B26 <dbl>, B27 <dbl>,
## #   B28 <dbl>, B29 <dbl>, B30 <dbl>, B31 <dbl>, B32 <dbl>

Data summary

The number of responses from each cohort:

test_scores %>% 
  group_by(year) %>% 
  tally() %>% 
  gt() %>% 
  data_color(
    columns = c("n"),
    colors = scales::col_numeric(palette = c("Blues"), domain = NULL)
  )
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
year n
2017 1678
2018 1746
2019 1995
2020 2188
2021 2041

Mean and standard deviation for each item:

test_scores %>% 
  select(-class) %>% 
  group_by(year) %>% 
  skim_without_charts() %>% 
  select(-contains("character."), -contains("numeric.p"), -skim_type) %>% 
  group_by(year) %>% 
  gt() %>% 
  fmt_number(columns = contains("numeric"), decimals = 3) %>%
  data_color(
    columns = c("numeric.mean"),
    colors = scales::col_numeric(palette = c("Greens"), domain = NULL)
  ) %>%
  cols_label(
    numeric.mean = "Mean",
    numeric.sd = "SD"
  )
skim_variable n_missing complete_rate Mean SD
2017
test_version 0 1.0000000 NA NA
A1_B1 15 0.9910608 0.960 0.195
A2_B0 131 0.9219309 0.333 0.471
A3_B2 77 0.9541120 0.655 0.476
A4_B3 27 0.9839094 0.655 0.476
A5_B4 544 0.6758045 0.692 0.462
A6_B5 362 0.7842670 0.901 0.298
A7_B6 19 0.9886770 0.716 0.451
A8_B7 311 0.8146603 0.583 0.493
A9_B8 613 0.6346841 0.731 0.444
A10_B9 452 0.7306317 0.745 0.436
A11_B10 352 0.7902265 0.729 0.445
A12_B0 216 0.8712753 0.784 0.412
A13_B0 211 0.8742551 0.429 0.495
A14_B12 237 0.8587604 0.653 0.476
A15_B13 269 0.8396901 0.551 0.498
A16_B14 427 0.7455304 0.249 0.433
A17_B15 154 0.9082241 0.649 0.477
A18_B16 468 0.7210965 0.529 0.499
A19_B0 566 0.6626937 0.533 0.499
A20_B17 108 0.9356377 0.776 0.417
A21_B18 408 0.7568534 0.674 0.469
A22_B19 477 0.7157330 0.723 0.448
A23_B20 492 0.7067938 0.584 0.493
A24_B21 326 0.8057211 0.730 0.444
A25_B0 78 0.9535161 0.568 0.495
A26_B22 250 0.8510131 0.938 0.241
A27_B23 590 0.6483909 0.603 0.490
A28_B24 647 0.6144219 0.686 0.464
A29_B25 373 0.7777116 0.624 0.485
A30_B26 145 0.9135876 0.830 0.376
A31_B27 214 0.8724672 0.432 0.495
A32_B28 466 0.7222884 0.315 0.465
A33_B29 57 0.9660310 0.808 0.394
A34_B30 497 0.7038141 0.826 0.379
A35_B0 886 0.4719905 0.688 0.464
A36_B0 830 0.5053635 0.436 0.496
A0_B11 1678 0.0000000 NaN NA
A0_B31 1678 0.0000000 NaN NA
A0_B32 1678 0.0000000 NaN NA
2018
test_version 0 1.0000000 NA NA
A1_B1 16 0.9908362 0.964 0.186
A2_B0 97 0.9444444 0.300 0.458
A3_B2 37 0.9788087 0.658 0.475
A4_B3 23 0.9868270 0.634 0.482
A5_B4 441 0.7474227 0.659 0.474
A6_B5 269 0.8459336 0.870 0.336
A7_B6 13 0.9925544 0.711 0.453
A8_B7 199 0.8860252 0.561 0.496
A9_B8 549 0.6855670 0.673 0.469
A10_B9 367 0.7898053 0.687 0.464
A11_B10 293 0.8321879 0.700 0.458
A12_B0 127 0.9272623 0.770 0.421
A13_B0 188 0.8923253 0.401 0.490
A14_B12 192 0.8900344 0.645 0.479
A15_B13 220 0.8739977 0.530 0.499
A16_B14 341 0.8046964 0.250 0.433
A17_B15 120 0.9312715 0.648 0.478
A18_B16 372 0.7869416 0.463 0.499
A19_B0 468 0.7319588 0.468 0.499
A20_B17 85 0.9513173 0.762 0.426
A21_B18 336 0.8075601 0.646 0.478
A22_B19 420 0.7594502 0.682 0.466
A23_B20 397 0.7726231 0.558 0.497
A24_B21 260 0.8510882 0.678 0.467
A25_B0 46 0.9736541 0.753 0.431
A26_B22 230 0.8682703 0.929 0.256
A27_B23 531 0.6958763 0.576 0.494
A28_B24 529 0.6970218 0.655 0.476
A29_B25 319 0.8172967 0.633 0.482
A30_B26 121 0.9306987 0.786 0.410
A31_B27 166 0.9049255 0.411 0.492
A32_B28 436 0.7502864 0.272 0.445
A33_B29 63 0.9639175 0.791 0.407
A34_B30 425 0.7565865 0.815 0.389
A35_B0 797 0.5435281 0.612 0.488
A36_B0 741 0.5756014 0.397 0.490
A0_B11 1746 0.0000000 NaN NA
A0_B31 1746 0.0000000 NaN NA
A0_B32 1746 0.0000000 NaN NA
2019
test_version 0 1.0000000 NA NA
A1_B1 12 0.9939850 0.963 0.188
A2_B0 1995 0.0000000 NaN NA
A3_B2 29 0.9854637 0.678 0.467
A4_B3 35 0.9824561 0.655 0.476
A5_B4 513 0.7428571 0.661 0.473
A6_B5 296 0.8516291 0.875 0.331
A7_B6 25 0.9874687 0.701 0.458
A8_B7 195 0.9022556 0.582 0.493
A9_B8 628 0.6852130 0.670 0.470
A10_B9 443 0.7779449 0.713 0.453
A11_B10 299 0.8501253 0.685 0.465
A12_B0 1995 0.0000000 NaN NA
A13_B0 1995 0.0000000 NaN NA
A14_B12 229 0.8852130 0.664 0.473
A15_B13 278 0.8606516 0.550 0.498
A16_B14 372 0.8135338 0.251 0.434
A17_B15 133 0.9333333 0.641 0.480
A18_B16 487 0.7558897 0.366 0.482
A19_B0 1995 0.0000000 NaN NA
A20_B17 101 0.9493734 0.778 0.416
A21_B18 368 0.8155388 0.661 0.473
A22_B19 453 0.7729323 0.669 0.471
A23_B20 439 0.7799499 0.540 0.499
A24_B21 268 0.8656642 0.690 0.463
A25_B0 1995 0.0000000 NaN NA
A26_B22 261 0.8691729 0.927 0.261
A27_B23 580 0.7092732 0.595 0.491
A28_B24 600 0.6992481 0.649 0.478
A29_B25 344 0.8275689 0.614 0.487
A30_B26 240 0.8796992 0.612 0.487
A31_B27 192 0.9037594 0.438 0.496
A32_B28 446 0.7764411 0.260 0.439
A33_B29 69 0.9654135 0.799 0.401
A34_B30 420 0.7894737 0.770 0.421
A35_B0 1995 0.0000000 NaN NA
A36_B0 1995 0.0000000 NaN NA
A0_B11 124 0.9378446 0.607 0.489
A0_B31 618 0.6902256 0.502 0.500
A0_B32 1179 0.4090226 0.488 0.500
2020
test_version 0 1.0000000 NA NA
A1_B1 13 0.9940585 0.963 0.188
A2_B0 2188 0.0000000 NaN NA
A3_B2 48 0.9780622 0.671 0.470
A4_B3 31 0.9858318 0.632 0.482
A5_B4 592 0.7294333 0.670 0.470
A6_B5 299 0.8633455 0.886 0.318
A7_B6 20 0.9908592 0.711 0.453
A8_B7 283 0.8706581 0.691 0.462
A9_B8 661 0.6978976 0.623 0.485
A10_B9 481 0.7801645 0.705 0.456
A11_B10 354 0.8382084 0.710 0.454
A12_B0 2188 0.0000000 NaN NA
A13_B0 2188 0.0000000 NaN NA
A14_B12 253 0.8843693 0.613 0.487
A15_B13 279 0.8724863 0.524 0.500
A16_B14 410 0.8126143 0.243 0.429
A17_B15 139 0.9364717 0.632 0.483
A18_B16 492 0.7751371 0.351 0.478
A19_B0 2188 0.0000000 NaN NA
A20_B17 100 0.9542962 0.741 0.438
A21_B18 401 0.8167276 0.632 0.482
A22_B19 481 0.7801645 0.664 0.473
A23_B20 512 0.7659963 0.546 0.498
A24_B21 313 0.8569470 0.659 0.474
A25_B0 2188 0.0000000 NaN NA
A26_B22 276 0.8738574 0.929 0.256
A27_B23 635 0.7097806 0.558 0.497
A28_B24 710 0.6755027 0.616 0.487
A29_B25 409 0.8130713 0.604 0.489
A30_B26 288 0.8683729 0.615 0.487
A31_B27 268 0.8775137 0.461 0.499
A32_B28 529 0.7582267 0.274 0.446
A33_B29 76 0.9652651 0.763 0.425
A34_B30 464 0.7879342 0.770 0.421
A35_B0 2188 0.0000000 NaN NA
A36_B0 2188 0.0000000 NaN NA
A0_B11 123 0.9437843 0.630 0.483
A0_B31 676 0.6910420 0.472 0.499
A0_B32 1266 0.4213894 0.507 0.500
2021
test_version 0 1.0000000 NA NA
A1_B1 16 0.9921607 0.963 0.189
A2_B0 2041 0.0000000 NaN NA
A3_B2 35 0.9828515 0.680 0.466
A4_B3 42 0.9794219 0.617 0.486
A5_B4 513 0.7486526 0.657 0.475
A6_B5 297 0.8544831 0.872 0.334
A7_B6 16 0.9921607 0.687 0.464
A8_B7 261 0.8721215 0.669 0.471
A9_B8 606 0.7030867 0.606 0.489
A10_B9 474 0.7677609 0.690 0.462
A11_B10 360 0.8236159 0.694 0.461
A12_B0 2041 0.0000000 NaN NA
A13_B0 2041 0.0000000 NaN NA
A14_B12 260 0.8726115 0.616 0.487
A15_B13 267 0.8691818 0.525 0.500
A16_B14 404 0.8020578 0.246 0.431
A17_B15 122 0.9402254 0.611 0.488
A18_B16 449 0.7800098 0.358 0.480
A19_B0 2041 0.0000000 NaN NA
A20_B17 105 0.9485546 0.750 0.433
A21_B18 405 0.8015679 0.639 0.481
A22_B19 479 0.7653111 0.653 0.476
A23_B20 461 0.7741303 0.549 0.498
A24_B21 325 0.8407643 0.681 0.466
A25_B0 2041 0.0000000 NaN NA
A26_B22 259 0.8731014 0.919 0.273
A27_B23 609 0.7016169 0.585 0.493
A28_B24 646 0.6834885 0.602 0.490
A29_B25 387 0.8103871 0.619 0.486
A30_B26 290 0.8579128 0.613 0.487
A31_B27 280 0.8628123 0.469 0.499
A32_B28 485 0.7623714 0.269 0.443
A33_B29 79 0.9612935 0.783 0.412
A34_B30 451 0.7790299 0.771 0.420
A35_B0 2041 0.0000000 NaN NA
A36_B0 2041 0.0000000 NaN NA
A0_B11 106 0.9480647 0.613 0.487
A0_B31 519 0.7457129 0.604 0.489
A0_B32 1154 0.4345909 0.488 0.500

2. Testing assumptions

Before applying IRT, we should check that the data satisfies the assumptions needed by the model. In particular, to use a 1-dimensional IRT model, we should have some evidence of unidimensionality in the test scores.

Local independence

This plot shows the correlations between scores on each pair of items – note that it is restricted to only those items that appear on both versions of the test, since the plotting package did not deal well with missing data:

item_scores <- test_scores %>% 
  select(-class, -year, -test_version)

item_scores_unchanged_only <- item_scores %>% 
  select(!contains("B0")) %>% select(!contains("A0"))

cor_ci <- psych::corCi(item_scores_unchanged_only, plot = FALSE)

psych::cor.plot.upperLowerCi(cor_ci)

There are a few correlations that are not significantly different from 0:

cor_ci$ci %>% 
  as_tibble(rownames = "corr") %>% 
  filter(p > 0.05) %>% 
  arrange(-p) %>% 
  select(-contains(".e")) %>% 
  gt() %>% 
  fmt_number(columns = 2:4, decimals = 3)
corr lower upper p
A1_B1-A29_B −0.016 0.032 0.535
A29_B-A31_B −0.040 0.013 0.305
A24_B-A29_B −0.008 0.039 0.203

Here we redo the correlation calculations with all the items, and check that there are still few cases where the correlations close to 0:

cor_ci2 <- psych::corCi(item_scores, plot = FALSE)
cor_ci2$ci %>% 
  as_tibble(rownames = "corr") %>% 
  filter(p > 0.05) %>% 
  arrange(-p) %>% 
  select(-contains(".e")) %>% 
  gt() %>% 
  fmt_number(columns = 2:4, decimals = 3)
corr lower upper p
A1_B1-A29_B −0.014 0.029 0.494
A2_B0-A34_B −0.019 0.063 0.284
A24_B-A29_B −0.009 0.040 0.215
A29_B-A31_B −0.037 0.008 0.206
A1_B1-A2_B0 −0.011 0.061 0.176
A2_B0-A25_B −0.010 0.070 0.142
A1_B1-A12_B −0.006 0.076 0.096
A25_B-A35_B −0.004 0.098 0.069
A12_B-A34_B −0.002 0.093 0.059

The overall picture is that the item scores are well correlated with each other.

Dimensionality

Here we again focus on the subset of items that appeared in both tests.

structure <- check_factorstructure(item_scores_unchanged_only)
n <- n_factors(item_scores_unchanged_only)

Is the data suitable for Factor Analysis?

  • KMO: The Kaiser, Meyer, Olkin (KMO) measure of sampling adequacy suggests that data seems appropriate for factor analysis (KMO = 0.95).
  • Sphericity: Bartlett’s test of sphericity suggests that there is sufficient significant correlation in the data for factor analysis (Chisq(406) = 41570.34, p < .001).

Method Agreement Procedure:

The choice of 1 dimensions is supported by 6 (26.09%) methods out of 23 (Acceleration factor, R2, VSS complexity 1, Velicer’s MAP, TLI, RMSEA).

plot(n)

summary(n) %>% gt()
n_Factors n_Methods
1 6
2 2
3 1
4 6
7 1
11 2
19 1
21 1
28 3
#n %>% tibble() %>% gt()
fa.parallel(item_scores_unchanged_only, fa = "fa")

## Parallel analysis suggests that the number of factors =  8  and the number of components =  NA

1 Factor

item_scores_unchanged_only_and_no_na <- item_scores_unchanged_only %>%
  mutate(
    across(everything(), ~replace_na(.x, 0))
  )
fitfact <- factanal(item_scores_unchanged_only_and_no_na, factors = 1, rotation = "varimax")
print(fitfact, digits = 2, cutoff = 0.3, sort = TRUE)
## 
## Call:
## factanal(x = item_scores_unchanged_only_and_no_na, factors = 1,     rotation = "varimax")
## 
## Uniquenesses:
##   A1_B1   A3_B2   A4_B3   A5_B4   A6_B5   A7_B6   A8_B7   A9_B8  A10_B9 A11_B10 
##    0.96    0.77    0.80    0.73    0.77    0.77    0.79    0.62    0.66    0.69 
## A14_B12 A15_B13 A16_B14 A17_B15 A18_B16 A20_B17 A21_B18 A22_B19 A23_B20 A24_B21 
##    0.72    0.88    0.82    0.78    0.84    0.65    0.59    0.61    0.62    0.74 
## A26_B22 A27_B23 A28_B24 A29_B25 A30_B26 A31_B27 A32_B28 A33_B29 A34_B30 
##    0.70    0.64    0.64    0.87    0.82    0.77    0.80    0.88    0.88 
## 
## Loadings:
##  [1] 0.52 0.62 0.59 0.56 0.53 0.59 0.64 0.62 0.61 0.51 0.55 0.60 0.60      0.48
## [16] 0.45 0.48 0.48 0.46 0.35 0.42 0.46 0.40 0.36 0.42 0.48 0.45 0.35 0.34
## 
##                Factor1
## SS loadings       7.19
## Proportion Var    0.25
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 5073.36 on 377 degrees of freedom.
## The p-value is 0
load <- tidy(fitfact)

ggplot(load, aes(x = fl1, y = 0)) + 
  geom_point() + 
  geom_label_repel(aes(label = paste0("A", rownames(load))), show.legend = FALSE) +
  labs(x = "Factor 1", y = NULL,
       title = "Standardised Loadings", 
       subtitle = "Based upon correlation matrix") +
  theme_minimal()
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# To proceed with the IRT analysis, comment out the following line before knitting
#knitr::knit_exit()

3. Fitting 2 parameter logistic MIRT model

We can fit a Multidimensional Item Response Theory (mirt) model. From the function definition:

mirt fits a maximum likelihood (or maximum a posteriori) factor analysis model to any mixture of dichotomous and polytomous data under the item response theory paradigm using either Cai's (2010) Metropolis-Hastings Robbins-Monro (MHRM) algorithm.

The process is to first fit the model, and save the result as a model object that we can then parse to get tabular or visual displays of the model that we might want. When fitting the model, we have the option to specify a few arguments, which then get interpreted as parameters to be passed to the model.

fit_2pl <- mirt(
  data = item_scores, # just the columns with question scores
  #removeEmptyRows = TRUE, 
  model = 1,          # number of factors to extract
  itemtype = "2PL",   # 2 parameter logistic model
  SE = TRUE           # estimate standard errors
  )
## 
Iteration: 1, Log-Lik: -153078.292, Max-Change: 3.97531
Iteration: 2, Log-Lik: -144683.865, Max-Change: 3.18540
Iteration: 3, Log-Lik: -143129.397, Max-Change: 0.51288
Iteration: 4, Log-Lik: -142627.917, Max-Change: 0.56576
Iteration: 5, Log-Lik: -142393.297, Max-Change: 0.16584
Iteration: 6, Log-Lik: -142217.904, Max-Change: 0.23612
Iteration: 7, Log-Lik: -142107.527, Max-Change: 0.10023
Iteration: 8, Log-Lik: -142014.839, Max-Change: 0.13562
Iteration: 9, Log-Lik: -141948.534, Max-Change: 0.06224
Iteration: 10, Log-Lik: -141891.498, Max-Change: 0.09381
Iteration: 11, Log-Lik: -141852.472, Max-Change: 0.06481
Iteration: 12, Log-Lik: -141816.748, Max-Change: 0.06046
Iteration: 13, Log-Lik: -141792.864, Max-Change: 0.03712
Iteration: 14, Log-Lik: -141771.799, Max-Change: 0.04925
Iteration: 15, Log-Lik: -141757.902, Max-Change: 0.03397
Iteration: 16, Log-Lik: -141746.237, Max-Change: 0.02459
Iteration: 17, Log-Lik: -141737.341, Max-Change: 0.02003
Iteration: 18, Log-Lik: -141730.332, Max-Change: 0.01591
Iteration: 19, Log-Lik: -141724.992, Max-Change: 0.01455
Iteration: 20, Log-Lik: -141720.793, Max-Change: 0.01250
Iteration: 21, Log-Lik: -141717.574, Max-Change: 0.01163
Iteration: 22, Log-Lik: -141711.914, Max-Change: 0.01239
Iteration: 23, Log-Lik: -141710.375, Max-Change: 0.00837
Iteration: 24, Log-Lik: -141709.314, Max-Change: 0.00617
Iteration: 25, Log-Lik: -141707.449, Max-Change: 0.00420
Iteration: 26, Log-Lik: -141707.143, Max-Change: 0.00409
Iteration: 27, Log-Lik: -141706.907, Max-Change: 0.00287
Iteration: 28, Log-Lik: -141706.461, Max-Change: 0.00266
Iteration: 29, Log-Lik: -141706.391, Max-Change: 0.00172
Iteration: 30, Log-Lik: -141706.338, Max-Change: 0.00152
Iteration: 31, Log-Lik: -141706.204, Max-Change: 0.00016
Iteration: 32, Log-Lik: -141706.203, Max-Change: 0.00012
Iteration: 33, Log-Lik: -141706.202, Max-Change: 0.00012
Iteration: 34, Log-Lik: -141706.198, Max-Change: 0.00045
Iteration: 35, Log-Lik: -141706.197, Max-Change: 0.00009
## 
## Calculating information matrix...

We then compute factor score estimates and augment the existing data frame with these estimates, to keep everything in one place. To do the estimation, we use the fscores() function from the mirt package which takes in a computed model object and computes factor score estimates according to the method specified. We will use the EAP method for factor score estimation, which is the “expected a-posteriori” method, the default. We specify it explicitly below, but the results would have been the same if we omitted specifying the method argument since it’s the default method the function uses.

test_scores <- test_scores %>%
  mutate(F1 = fscores(fit_2pl, method = "EAP"))

We can also calculate the model coefficient estimates using a generic function coef() which is used to extract model coefficients from objects returned by modeling functions. We will set the IRTpars argument to TRUE, which means slope intercept parameters will be converted into traditional IRT parameters.

coefs_2pl <- coef(fit_2pl, IRTpars = TRUE)

The resulting object coefs is a list, with one element for each question, and an additional GroupPars element that we won’t be using. The output is a bit long, so we’re only showing a few of the elements here:

coefs_2pl[1:3]
## $A1_B1
##                 a         b  g  u
## par     1.1065245 -3.397220  0  1
## CI_2.5  0.9615469 -3.747013 NA NA
## CI_97.5 1.2515022 -3.047426 NA NA
## 
## $A2_B0
##                 a        b  g  u
## par     0.6021698 1.460565  0  1
## CI_2.5  0.5117190 1.237888 NA NA
## CI_97.5 0.6926207 1.683242 NA NA
## 
## $A3_B2
##                a          b  g  u
## par     1.310811 -0.7101006  0  1
## CI_2.5  1.235152 -0.7592152 NA NA
## CI_97.5 1.386469 -0.6609860 NA NA
# coefs_2pl[35:37]

Let’s take a closer look at the first element:

coefs_2pl[1]
## $A1_B1
##                 a         b  g  u
## par     1.1065245 -3.397220  0  1
## CI_2.5  0.9615469 -3.747013 NA NA
## CI_97.5 1.2515022 -3.047426 NA NA

In this output:

  • a is discrimination
  • b is difficulty
  • endpoints of the 95% confidence intervals are also shown

To make this output a little more user friendly, we should tidy it such that we have a row per question. We’ll do this in two steps. First, write a function that tidies the output for one question, i.e. one list element. Then, map this function over the list of all questions, resulting in a data frame.

tidy_mirt_coefs <- function(x){
  x %>%
    # melt the list element
    melt() %>%
    # convert to a tibble
    as_tibble() %>%
    # convert factors to characters
    mutate(across(where(is.factor), as.character)) %>%
    # only focus on rows where X2 is a or b (discrimination or difficulty)
    filter(X2 %in% c("a", "b")) %>%
    # in X1, relabel par (parameter) as est (estimate)
    mutate(X1 = if_else(X1 == "par", "est", X1)) %>%
    # unite columns X2 and X1 into a new column called var separated by _
    unite(X2, X1, col = "var", sep = "_") %>%
    # turn into a wider data frame
    pivot_wider(names_from = var, values_from = value)
}

Let’s see what this does to a single element in coefs:

tidy_mirt_coefs(coefs_2pl[1])
## # A tibble: 1 x 7
##   L1    a_est a_CI_2.5 a_CI_97.5 b_est b_CI_2.5 b_CI_97.5
##   <chr> <dbl>    <dbl>     <dbl> <dbl>    <dbl>     <dbl>
## 1 A1_B1  1.11    0.962      1.25 -3.40    -3.75     -3.05

And now let’s map it over all elements of coefs:

# use head(., -1) to remove the last element, `GroupPars`, which does not correspond to a question
tidy_2pl <- map_dfr(head(coefs_2pl, -1), tidy_mirt_coefs, .id = "Question")

A quick peek at the result:

tidy_2pl
## # A tibble: 39 x 7
##    Question a_est a_CI_2.5 a_CI_97.5  b_est b_CI_2.5 b_CI_97.5
##    <chr>    <dbl>    <dbl>     <dbl>  <dbl>    <dbl>     <dbl>
##  1 A1_B1    1.11     0.962     1.25  -3.40    -3.75     -3.05 
##  2 A2_B0    0.602    0.512     0.693  1.46     1.24      1.68 
##  3 A3_B2    1.31     1.24      1.39  -0.710   -0.759    -0.661
##  4 A4_B3    1.26     1.18      1.33  -0.586   -0.633    -0.538
##  5 A5_B4    1.04     0.960     1.11  -0.615   -0.684    -0.545
##  6 A6_B5    0.955    0.863     1.05  -2.28    -2.48     -2.09 
##  7 A7_B6    1.43     1.34      1.51  -0.843   -0.892    -0.793
##  8 A8_B7    1.01     0.944     1.08  -0.541   -0.600    -0.482
##  9 A9_B8    1.63     1.53      1.74  -0.344   -0.390    -0.297
## 10 A10_B9   1.41     1.32      1.50  -0.691   -0.746    -0.635
## # ... with 29 more rows

And a nicely formatted table of the result:

gt(tidy_2pl) %>%
  fmt_number(columns = contains("_"), decimals = 3) %>%
  data_color(
    columns = contains("a_"),
    colors = scales::col_numeric(palette = c("Greens"), domain = NULL)
  ) %>%
  data_color(
    columns = contains("b_"),
    colors = scales::col_numeric(palette = c("Blues"), domain = NULL)
  ) %>%
  tab_spanner(label = "Discrimination", columns = contains("a_")) %>%
  tab_spanner(label = "Difficulty", columns = contains("b_")) %>%
  cols_label(
    a_est = "Est.",
    b_est = "Est.",
    a_CI_2.5 = "2.5%",
    b_CI_2.5 = "2.5%",
    a_CI_97.5 = "97.5%",
    b_CI_97.5 = "97.5%"
  )
Question Discrimination Difficulty
Est. 2.5% 97.5% Est. 2.5% 97.5%
A1_B1 1.107 0.962 1.252 −3.397 −3.747 −3.047
A2_B0 0.602 0.512 0.693 1.461 1.238 1.683
A3_B2 1.311 1.235 1.386 −0.710 −0.759 −0.661
A4_B3 1.255 1.183 1.327 −0.586 −0.633 −0.538
A5_B4 1.036 0.960 1.111 −0.615 −0.684 −0.545
A6_B5 0.955 0.863 1.047 −2.283 −2.475 −2.090
A7_B6 1.425 1.343 1.507 −0.843 −0.892 −0.793
A8_B7 1.010 0.944 1.076 −0.541 −0.600 −0.482
A9_B8 1.635 1.532 1.738 −0.344 −0.390 −0.297
A10_B9 1.406 1.316 1.496 −0.691 −0.746 −0.635
A11_B10 1.302 1.220 1.385 −0.742 −0.799 −0.685
A12_B0 0.783 0.673 0.893 −1.705 −1.936 −1.473
A13_B0 0.769 0.673 0.866 0.585 0.470 0.699
A14_B12 1.361 1.281 1.440 −0.500 −0.547 −0.453
A15_B13 0.975 0.912 1.039 −0.196 −0.249 −0.143
A16_B14 1.249 1.167 1.332 1.200 1.129 1.271
A17_B15 1.204 1.133 1.276 −0.555 −0.605 −0.504
A18_B16 0.879 0.815 0.944 0.601 0.534 0.668
A19_B0 1.077 0.954 1.200 0.175 0.089 0.262
A20_B17 1.807 1.699 1.914 −0.957 −1.004 −0.909
A21_B18 1.636 1.539 1.733 −0.418 −0.461 −0.374
A22_B19 1.636 1.536 1.735 −0.515 −0.561 −0.469
A23_B20 1.425 1.338 1.512 −0.020 −0.063 0.024
A24_B21 1.227 1.149 1.305 −0.715 −0.773 −0.658
A25_B0 0.443 0.358 0.527 −1.543 −1.870 −1.217
A26_B22 1.631 1.486 1.776 −2.000 −2.124 −1.877
A27_B23 1.363 1.275 1.451 −0.105 −0.152 −0.057
A28_B24 1.292 1.205 1.380 −0.339 −0.393 −0.284
A29_B25 0.323 0.270 0.375 −1.381 −1.659 −1.103
A30_B26 0.956 0.890 1.023 −0.899 −0.973 −0.826
A31_B27 1.134 1.066 1.203 0.320 0.271 0.368
A32_B28 1.175 1.094 1.257 1.191 1.119 1.262
A33_B29 0.931 0.860 1.001 −1.620 −1.731 −1.509
A34_B30 0.548 0.482 0.614 −2.447 −2.738 −2.156
A35_B0 0.984 0.846 1.122 −0.463 −0.596 −0.329
A36_B0 0.928 0.802 1.053 0.662 0.542 0.783
A0_B11 0.744 0.674 0.814 −0.731 −0.825 −0.637
A0_B31 0.748 0.670 0.825 −0.068 −0.155 0.020
A0_B32 1.226 1.107 1.344 0.137 0.061 0.213
tidy_2pl %>% 
  mutate(qnum = parse_number(Question)) %>% 
  ggplot(aes(x = qnum, y = b_est, label = Question)) +
  geom_errorbar(aes(ymin = b_CI_2.5, ymax = b_CI_97.5), width = 0.2) +
  geom_text(colour = "grey") +
  geom_point() +
  theme_minimal() +
  labs(x = "Question",
       y = "Difficulty")

This shows the difficulty and discrimination of each of the questions, highlighting those that were added or removed:

tidy_2pl %>% 
  left_join(test_versions, by = c("Question" = "label")) %>% 
  mutate(qnum = parse_number(Question)) %>% 
  ggplot(aes(x = a_est, y = b_est, label = ifelse(outcome=="unchanged", "", Question), colour = outcome)) +
  geom_errorbar(aes(ymin = b_CI_2.5, ymax = b_CI_97.5), width = 0.1, alpha = 0.5) +
  geom_errorbar(aes(xmin = a_CI_2.5, xmax = a_CI_97.5), width = 0.1, alpha = 0.5) +
  geom_text_repel() +
  geom_point() +
  theme_minimal() +
  labs(x = "Discrimination",
       y = "Difficulty")

Comparing years and classes

Do students from different programmes of study have different distributions of ability?

Differences between years

Compare the distribution of abilities in the year groups (though in this case there is only one).

ggplot(test_scores, aes(F1, y = year, fill = as.factor(year), colour = as.factor(year))) +
  geom_density_ridges(alpha=0.5) + 
  scale_x_continuous(limits = c(-3.5,3.5)) +
  labs(title = "Density plot", 
       subtitle = "Ability grouped by year of taking the test", 
       x = "Ability", y = "Density",
       fill = "Year", colour = "Year") +
  theme_minimal()
## Picking joint bandwidth of 0.182

Information curves

Test information curve

plot(fit_2pl, type = "infoSE", main = "Test information")

Item information curves

plot(fit_2pl, type = "infotrace", main = "Item information curves")

Response curves

Test response curves

plot(fit_2pl, type = "score", auto.key = FALSE)

Item response curves

We can get individual item surface and information plots using the itemplot() function from the mirt package, e.g.

mirt::itemplot(fit_2pl, item = 1, 
               main = "Trace lines for item 1")

We can also get the plots for all trace lines, one facet per plot.

plot(fit_2pl, type = "trace", auto.key = FALSE)

Or all of them overlaid in one plot.

plot(fit_2pl, type = "trace", facet_items=FALSE)

An alternative approach is using ggplot2 and plotly to add interactivity to make it easier to identify items.

# store the object
plt <- plot(fit_2pl, type = "trace", facet_items = FALSE)
# the data we need is in panel.args
# TODO - I had to change the [[1]] to [[2]] since the plt has two panels for some reason, with the one we want being the 2nd panel
plt_data <- tibble(
  x          = plt$panel.args[[2]]$x,
  y          = plt$panel.args[[2]]$y,
  subscripts = plt$panel.args[[2]]$subscripts,
  item       = rep(colnames(item_scores), each = 200)
) %>%
  mutate(
    item_no = str_remove(item, "A") %>% as.numeric(),
    item    = fct_reorder(item, item_no)
    )

head(plt_data)
## # A tibble: 6 x 5
##       x      y subscripts item  item_no
##   <dbl>  <dbl>      <int> <fct>   <dbl>
## 1 -6    0.0531        201 A1_B1      NA
## 2 -5.94 0.0566        202 A1_B1      NA
## 3 -5.88 0.0603        203 A1_B1      NA
## 4 -5.82 0.0642        204 A1_B1      NA
## 5 -5.76 0.0683        205 A1_B1      NA
## 6 -5.70 0.0727        206 A1_B1      NA
plt_gg <- ggplot(plt_data, aes(x, y, 
                          colour = item, 
                          text = item)) + 
  geom_line() + 
  labs(
    title = "2PL - Trace lines",
    #x = expression(theta),
    x = "theta",
    #y = expression(P(theta)),
    y = "P(theta)",
    colour = "Item"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

ggplotly(plt_gg, tooltip = "text")
knitr::knit_exit()